'''
Created on 5/01/2013

@author: Jorge
'''
import math

class BinormalSeparation(object):
    '''
    classdocs
    '''


    def __init__(self):
        '''
        Constructor
        '''
        pass
    
    def get_index_features(self, X, Y):
        indexes = []
        features = self.get_feature_score(X, Y)
        for f in features:
            if f.score>=0.5:
                indexes.append(f.index)
        return indexes
        
    def get_feature_score(self,X,Y):
        #print 'BinormalSeparation running'
        #print 'original dimension: ',len(X[0])
        classes = {}
        for i in range(len(Y)):
            try:
                (classes[Y[i]]).append(X[i])
            except KeyError:
                classes[Y[i]] = [ X[i] ]
        if len(classes.keys())!=2:
            raise ValueError( "dataset must have only two classes ->"+str(len(classes.keys())) )
        
        features = []
        first_class = classes[classes.keys()[0]]
        second_class = classes[classes.keys()[1]]
        n = len(X[0])
        for i in range(n):
            tp, fp = 0, 0
            for e in first_class:
                #print e
                if e[i]>0:
                    tp+=1
            for e in second_class:
                if e[i]>0:
                    fp+=1
            tpr = (tp+0.0)/len(first_class)
            fpr = (fp+0.0)/len(second_class)
            #print tpr, ' - ', fpr
            tpr += (0.0005 if tpr==0 else 0)
            tpr -= (0.0005 if tpr==1 else 0)
            fpr += (0.0005 if fpr==0 else 0)
            fpr -= (0.0005 if fpr==1 else 0)
            if tpr==0 or tpr==1 or fpr==1 or fpr==0: continue
            score = abs(self.__ltqnorm(tpr) - self.__ltqnorm(fpr))
            features.append(Feature(i, score))
        

        return features

    
    def __ltqnorm(self, p ):
        """
        Modified from the author's original perl code (original comments follow below)
        by dfield@yahoo-inc.com.  May 3, 2004.

        Lower tail quantile for standard normal distribution function.

        This function returns an approximation of the inverse cumulative
        standard normal distribution function.  I.e., given P, it returns
        an approximation to the X satisfying P = Pr{Z <= X} where Z is a
        random variable from the standard normal distribution.

        The algorithm uses a minimax approximation by rational functions
        and the result has a relative error whose absolute value is less
        than 1.15e-9.

        Author:      Peter John Acklam
        Time-stamp:  2000-07-19 18:26:14
        E-mail:      pjacklam@online.no
        WWW URL:     http://home.online.no/~pjacklam
        """

        if p <= 0 or p >= 1:
            # The original perl code exits here, we'll throw an exception instead
            raise ValueError( "Argument to ltqnorm %f must be in open interval (0,1)" % p )

        # Coefficients in rational approximations.
        a = (-3.969683028665376e+01,  2.209460984245205e+02, \
             -2.759285104469687e+02,  1.383577518672690e+02, \
             -3.066479806614716e+01,  2.506628277459239e+00)
        b = (-5.447609879822406e+01,  1.615858368580409e+02, \
             -1.556989798598866e+02,  6.680131188771972e+01, \
             -1.328068155288572e+01 )
        c = (-7.784894002430293e-03, -3.223964580411365e-01, \
             -2.400758277161838e+00, -2.549732539343734e+00, \
              4.374664141464968e+00,  2.938163982698783e+00)
        d = ( 7.784695709041462e-03,  3.224671290700398e-01, \
              2.445134137142996e+00,  3.754408661907416e+00)

        # Define break-points.
        plow  = 0.02425
        phigh = 1 - plow

        # Rational approximation for lower region:
        if p < plow:
            q  = math.sqrt(-2*math.log(p))
            return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / \
                   ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)

        # Rational approximation for upper region:
        if phigh < p:
            q  = math.sqrt(-2*math.log(1-p))
            return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / \
                    ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)

        # Rational approximation for central region:
        q = p - 0.5
        r = q*q
        return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q / \
               (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1)
               
               
class Feature:
    
    def __init__(self, index, score):
        self.index = index
        self.score = score
        
    def __cmp__(self, other):
        return cmp(self.score, other.score)